; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -slp-vectorizer -mattr=+sse  < %s | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: opt -S -slp-vectorizer -mattr=+avx512f < %s | FileCheck %s --check-prefixes=CHECK,AVX512

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: norecurse nounwind readnone uwtable
define zeroext i8 @foo(i32 %x, i32 %y, i32 %a, i32 %b) local_unnamed_addr #0 {
; CHECK-LABEL: @foo(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT:    [[B_A:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[A:%.*]]
; CHECK-NEXT:    [[RETVAL_0:%.*]] = trunc i32 [[B_A]] to i8
; CHECK-NEXT:    ret i8 [[RETVAL_0]]
;
entry:
  %cmp = icmp slt i32 %x, %y
  %b.a = select i1 %cmp, i32 %b, i32 %a
  %retval.0 = trunc i32 %b.a to i8
  ret i8 %retval.0
}

define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i8* noalias nocapture readonly %d, i8* noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
; SSE-LABEL: @bar(
; SSE-NEXT:  entry:
; SSE-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[W:%.*]], i32 0
; SSE-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> poison, i32 [[W]], i32 0
; SSE-NEXT:    [[SHUFFLE3:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> zeroinitializer
; SSE-NEXT:    br label [[FOR_BODY:%.*]]
; SSE:       for.body:
; SSE-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
; SSE-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
; SSE-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
; SSE-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
; SSE-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
; SSE-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
; SSE-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
; SSE-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
; SSE-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
; SSE-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
; SSE-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
; SSE-NEXT:    [[TMP4:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>*
; SSE-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
; SSE-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
; SSE-NEXT:    [[TMP6:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>*
; SSE-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
; SSE-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
; SSE-NEXT:    [[TMP8:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>*
; SSE-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
; SSE-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
; SSE-NEXT:    [[TMP10:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>*
; SSE-NEXT:    [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
; SSE-NEXT:    [[TMP12:%.*]] = icmp ult <4 x i8> [[TMP5]], [[TMP7]]
; SSE-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP12]], <4 x i8> [[TMP11]], <4 x i8> [[TMP9]]
; SSE-NEXT:    [[TMP14:%.*]] = zext <4 x i8> [[TMP13]] to <4 x i32>
; SSE-NEXT:    [[TMP15:%.*]] = mul <4 x i32> [[TMP14]], [[SHUFFLE]]
; SSE-NEXT:    [[TMP16:%.*]] = trunc <4 x i32> [[TMP15]] to <4 x i8>
; SSE-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
; SSE-NEXT:    [[TMP17:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>*
; SSE-NEXT:    store <4 x i8> [[TMP16]], <4 x i8>* [[TMP17]], align 1
; SSE-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
; SSE-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
; SSE-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
; SSE-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
; SSE-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
; SSE-NEXT:    [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
; SSE-NEXT:    [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
; SSE-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
; SSE-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
; SSE-NEXT:    [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
; SSE-NEXT:    [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
; SSE-NEXT:    [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
; SSE-NEXT:    [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
; SSE-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
; SSE-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
; SSE-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
; SSE-NEXT:    [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>*
; SSE-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
; SSE-NEXT:    [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
; SSE-NEXT:    [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>*
; SSE-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
; SSE-NEXT:    [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
; SSE-NEXT:    [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>*
; SSE-NEXT:    [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
; SSE-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
; SSE-NEXT:    [[TMP24:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>*
; SSE-NEXT:    [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
; SSE-NEXT:    [[TMP26:%.*]] = icmp ult <4 x i8> [[TMP19]], [[TMP21]]
; SSE-NEXT:    [[TMP27:%.*]] = select <4 x i1> [[TMP26]], <4 x i8> [[TMP25]], <4 x i8> [[TMP23]]
; SSE-NEXT:    [[TMP28:%.*]] = zext <4 x i8> [[TMP27]] to <4 x i32>
; SSE-NEXT:    [[TMP29:%.*]] = mul <4 x i32> [[TMP28]], [[SHUFFLE1]]
; SSE-NEXT:    [[TMP30:%.*]] = trunc <4 x i32> [[TMP29]] to <4 x i8>
; SSE-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
; SSE-NEXT:    [[TMP31:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>*
; SSE-NEXT:    store <4 x i8> [[TMP30]], <4 x i8>* [[TMP31]], align 1
; SSE-NEXT:    [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
; SSE-NEXT:    [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
; SSE-NEXT:    [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
; SSE-NEXT:    [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
; SSE-NEXT:    [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
; SSE-NEXT:    [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
; SSE-NEXT:    [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
; SSE-NEXT:    [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
; SSE-NEXT:    [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
; SSE-NEXT:    [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
; SSE-NEXT:    [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
; SSE-NEXT:    [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
; SSE-NEXT:    [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
; SSE-NEXT:    [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
; SSE-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
; SSE-NEXT:    [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
; SSE-NEXT:    [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>*
; SSE-NEXT:    [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
; SSE-NEXT:    [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
; SSE-NEXT:    [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>*
; SSE-NEXT:    [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1
; SSE-NEXT:    [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
; SSE-NEXT:    [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>*
; SSE-NEXT:    [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1
; SSE-NEXT:    [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
; SSE-NEXT:    [[TMP38:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>*
; SSE-NEXT:    [[TMP39:%.*]] = load <4 x i8>, <4 x i8>* [[TMP38]], align 1
; SSE-NEXT:    [[TMP40:%.*]] = icmp ult <4 x i8> [[TMP33]], [[TMP35]]
; SSE-NEXT:    [[TMP41:%.*]] = select <4 x i1> [[TMP40]], <4 x i8> [[TMP39]], <4 x i8> [[TMP37]]
; SSE-NEXT:    [[TMP42:%.*]] = zext <4 x i8> [[TMP41]] to <4 x i32>
; SSE-NEXT:    [[TMP43:%.*]] = mul <4 x i32> [[TMP42]], [[SHUFFLE2]]
; SSE-NEXT:    [[TMP44:%.*]] = trunc <4 x i32> [[TMP43]] to <4 x i8>
; SSE-NEXT:    [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
; SSE-NEXT:    [[TMP45:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>*
; SSE-NEXT:    store <4 x i8> [[TMP44]], <4 x i8>* [[TMP45]], align 1
; SSE-NEXT:    [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
; SSE-NEXT:    [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
; SSE-NEXT:    [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
; SSE-NEXT:    [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
; SSE-NEXT:    [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
; SSE-NEXT:    [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
; SSE-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
; SSE-NEXT:    [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
; SSE-NEXT:    [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
; SSE-NEXT:    [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
; SSE-NEXT:    [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
; SSE-NEXT:    [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
; SSE-NEXT:    [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
; SSE-NEXT:    [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; SSE-NEXT:    [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; SSE-NEXT:    [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
; SSE-NEXT:    [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>*
; SSE-NEXT:    [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1
; SSE-NEXT:    [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
; SSE-NEXT:    [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>*
; SSE-NEXT:    [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1
; SSE-NEXT:    [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
; SSE-NEXT:    [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>*
; SSE-NEXT:    [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1
; SSE-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
; SSE-NEXT:    [[TMP52:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>*
; SSE-NEXT:    [[TMP53:%.*]] = load <4 x i8>, <4 x i8>* [[TMP52]], align 1
; SSE-NEXT:    [[TMP54:%.*]] = icmp ult <4 x i8> [[TMP47]], [[TMP49]]
; SSE-NEXT:    [[TMP55:%.*]] = select <4 x i1> [[TMP54]], <4 x i8> [[TMP53]], <4 x i8> [[TMP51]]
; SSE-NEXT:    [[TMP56:%.*]] = zext <4 x i8> [[TMP55]] to <4 x i32>
; SSE-NEXT:    [[TMP57:%.*]] = mul <4 x i32> [[TMP56]], [[SHUFFLE3]]
; SSE-NEXT:    [[TMP58:%.*]] = trunc <4 x i32> [[TMP57]] to <4 x i8>
; SSE-NEXT:    [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
; SSE-NEXT:    [[TMP59:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>*
; SSE-NEXT:    store <4 x i8> [[TMP58]], <4 x i8>* [[TMP59]], align 1
; SSE-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
; SSE-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
; SSE-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
; SSE-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
; SSE-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
; SSE-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
; SSE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
; SSE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; SSE:       for.end:
; SSE-NEXT:    ret void
;
; AVX512-LABEL: @bar(
; AVX512-NEXT:  entry:
; AVX512-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[W:%.*]], i32 0
; AVX512-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP0]], <16 x i32> poison, <16 x i32> zeroinitializer
; AVX512-NEXT:    br label [[FOR_BODY:%.*]]
; AVX512:       for.body:
; AVX512-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
; AVX512-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
; AVX512-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
; AVX512-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
; AVX512-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
; AVX512-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
; AVX512-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
; AVX512-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
; AVX512-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
; AVX512-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
; AVX512-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
; AVX512-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
; AVX512-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
; AVX512-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
; AVX512-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
; AVX512-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
; AVX512-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
; AVX512-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
; AVX512-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
; AVX512-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
; AVX512-NEXT:    [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
; AVX512-NEXT:    [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
; AVX512-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
; AVX512-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
; AVX512-NEXT:    [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
; AVX512-NEXT:    [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
; AVX512-NEXT:    [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
; AVX512-NEXT:    [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
; AVX512-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
; AVX512-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
; AVX512-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
; AVX512-NEXT:    [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
; AVX512-NEXT:    [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
; AVX512-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
; AVX512-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
; AVX512-NEXT:    [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
; AVX512-NEXT:    [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
; AVX512-NEXT:    [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
; AVX512-NEXT:    [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
; AVX512-NEXT:    [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
; AVX512-NEXT:    [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
; AVX512-NEXT:    [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
; AVX512-NEXT:    [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
; AVX512-NEXT:    [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
; AVX512-NEXT:    [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
; AVX512-NEXT:    [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
; AVX512-NEXT:    [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
; AVX512-NEXT:    [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
; AVX512-NEXT:    [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
; AVX512-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
; AVX512-NEXT:    [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
; AVX512-NEXT:    [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
; AVX512-NEXT:    [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
; AVX512-NEXT:    [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
; AVX512-NEXT:    [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
; AVX512-NEXT:    [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
; AVX512-NEXT:    [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
; AVX512-NEXT:    [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
; AVX512-NEXT:    [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
; AVX512-NEXT:    [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
; AVX512-NEXT:    [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
; AVX512-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
; AVX512-NEXT:    [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
; AVX512-NEXT:    [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
; AVX512-NEXT:    [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
; AVX512-NEXT:    [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
; AVX512-NEXT:    [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
; AVX512-NEXT:    [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
; AVX512-NEXT:    [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
; AVX512-NEXT:    [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
; AVX512-NEXT:    [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
; AVX512-NEXT:    [[TMP1:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
; AVX512-NEXT:    [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
; AVX512-NEXT:    [[TMP3:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
; AVX512-NEXT:    [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
; AVX512-NEXT:    [[TMP5:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[TMP5]], align 1
; AVX512-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
; AVX512-NEXT:    [[TMP7:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
; AVX512-NEXT:    [[TMP8:%.*]] = load <16 x i8>, <16 x i8>* [[TMP7]], align 1
; AVX512-NEXT:    [[TMP9:%.*]] = icmp ult <16 x i8> [[TMP2]], [[TMP4]]
; AVX512-NEXT:    [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i8> [[TMP8]], <16 x i8> [[TMP6]]
; AVX512-NEXT:    [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32>
; AVX512-NEXT:    [[TMP12:%.*]] = mul <16 x i32> [[TMP11]], [[SHUFFLE]]
; AVX512-NEXT:    [[TMP13:%.*]] = trunc <16 x i32> [[TMP12]] to <16 x i8>
; AVX512-NEXT:    [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
; AVX512-NEXT:    [[TMP14:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
; AVX512-NEXT:    store <16 x i8> [[TMP13]], <16 x i8>* [[TMP14]], align 1
; AVX512-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
; AVX512-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
; AVX512-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
; AVX512-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
; AVX512-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
; AVX512-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
; AVX512-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; AVX512:       for.end:
; AVX512-NEXT:    ret void
;
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.0356 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %a.addr.0355 = phi i8* [ %a, %entry ], [ %add.ptr, %for.body ]
  %e.addr.0354 = phi i8* [ %e, %entry ], [ %add.ptr192, %for.body ]
  %d.addr.0353 = phi i8* [ %d, %entry ], [ %add.ptr191, %for.body ]
  %c.addr.0352 = phi i8* [ %c, %entry ], [ %add.ptr190, %for.body ]
  %b.addr.0351 = phi i8* [ %b, %entry ], [ %add.ptr189, %for.body ]
  %0 = load i8, i8* %c.addr.0352, align 1
  %1 = load i8, i8* %d.addr.0353, align 1
  %2 = load i8, i8* %a.addr.0355, align 1
  %3 = load i8, i8* %b.addr.0351, align 1
  %cmp.i = icmp ult i8 %0, %1
  %b.a.i.v.v = select i1 %cmp.i, i8 %3, i8 %2
  %b.a.i.v = zext i8 %b.a.i.v.v to i32
  %b.a.i = mul i32 %b.a.i.v, %w
  %retval.0.i = trunc i32 %b.a.i to i8
  store i8 %retval.0.i, i8* %e.addr.0354, align 1
  %arrayidx9 = getelementptr inbounds i8, i8* %c.addr.0352, i64 1
  %4 = load i8, i8* %arrayidx9, align 1
  %arrayidx11 = getelementptr inbounds i8, i8* %d.addr.0353, i64 1
  %5 = load i8, i8* %arrayidx11, align 1
  %arrayidx13 = getelementptr inbounds i8, i8* %a.addr.0355, i64 1
  %6 = load i8, i8* %arrayidx13, align 1
  %arrayidx16 = getelementptr inbounds i8, i8* %b.addr.0351, i64 1
  %7 = load i8, i8* %arrayidx16, align 1
  %cmp.i348 = icmp ult i8 %4, %5
  %b.a.i349.v.v = select i1 %cmp.i348, i8 %7, i8 %6
  %b.a.i349.v = zext i8 %b.a.i349.v.v to i32
  %b.a.i349 = mul i32 %b.a.i349.v, %w
  %retval.0.i350 = trunc i32 %b.a.i349 to i8
  %arrayidx20 = getelementptr inbounds i8, i8* %e.addr.0354, i64 1
  store i8 %retval.0.i350, i8* %arrayidx20, align 1
  %arrayidx21 = getelementptr inbounds i8, i8* %c.addr.0352, i64 2
  %8 = load i8, i8* %arrayidx21, align 1
  %arrayidx23 = getelementptr inbounds i8, i8* %d.addr.0353, i64 2
  %9 = load i8, i8* %arrayidx23, align 1
  %arrayidx25 = getelementptr inbounds i8, i8* %a.addr.0355, i64 2
  %10 = load i8, i8* %arrayidx25, align 1
  %arrayidx28 = getelementptr inbounds i8, i8* %b.addr.0351, i64 2
  %11 = load i8, i8* %arrayidx28, align 1
  %cmp.i345 = icmp ult i8 %8, %9
  %b.a.i346.v.v = select i1 %cmp.i345, i8 %11, i8 %10
  %b.a.i346.v = zext i8 %b.a.i346.v.v to i32
  %b.a.i346 = mul i32 %b.a.i346.v, %w
  %retval.0.i347 = trunc i32 %b.a.i346 to i8
  %arrayidx32 = getelementptr inbounds i8, i8* %e.addr.0354, i64 2
  store i8 %retval.0.i347, i8* %arrayidx32, align 1
  %arrayidx33 = getelementptr inbounds i8, i8* %c.addr.0352, i64 3
  %12 = load i8, i8* %arrayidx33, align 1
  %arrayidx35 = getelementptr inbounds i8, i8* %d.addr.0353, i64 3
  %13 = load i8, i8* %arrayidx35, align 1
  %arrayidx37 = getelementptr inbounds i8, i8* %a.addr.0355, i64 3
  %14 = load i8, i8* %arrayidx37, align 1
  %arrayidx40 = getelementptr inbounds i8, i8* %b.addr.0351, i64 3
  %15 = load i8, i8* %arrayidx40, align 1
  %cmp.i342 = icmp ult i8 %12, %13
  %b.a.i343.v.v = select i1 %cmp.i342, i8 %15, i8 %14
  %b.a.i343.v = zext i8 %b.a.i343.v.v to i32
  %b.a.i343 = mul i32 %b.a.i343.v, %w
  %retval.0.i344 = trunc i32 %b.a.i343 to i8
  %arrayidx44 = getelementptr inbounds i8, i8* %e.addr.0354, i64 3
  store i8 %retval.0.i344, i8* %arrayidx44, align 1
  %arrayidx45 = getelementptr inbounds i8, i8* %c.addr.0352, i64 4
  %16 = load i8, i8* %arrayidx45, align 1
  %arrayidx47 = getelementptr inbounds i8, i8* %d.addr.0353, i64 4
  %17 = load i8, i8* %arrayidx47, align 1
  %arrayidx49 = getelementptr inbounds i8, i8* %a.addr.0355, i64 4
  %18 = load i8, i8* %arrayidx49, align 1
  %arrayidx52 = getelementptr inbounds i8, i8* %b.addr.0351, i64 4
  %19 = load i8, i8* %arrayidx52, align 1
  %cmp.i339 = icmp ult i8 %16, %17
  %b.a.i340.v.v = select i1 %cmp.i339, i8 %19, i8 %18
  %b.a.i340.v = zext i8 %b.a.i340.v.v to i32
  %b.a.i340 = mul i32 %b.a.i340.v, %w
  %retval.0.i341 = trunc i32 %b.a.i340 to i8
  %arrayidx56 = getelementptr inbounds i8, i8* %e.addr.0354, i64 4
  store i8 %retval.0.i341, i8* %arrayidx56, align 1
  %arrayidx57 = getelementptr inbounds i8, i8* %c.addr.0352, i64 5
  %20 = load i8, i8* %arrayidx57, align 1
  %arrayidx59 = getelementptr inbounds i8, i8* %d.addr.0353, i64 5
  %21 = load i8, i8* %arrayidx59, align 1
  %arrayidx61 = getelementptr inbounds i8, i8* %a.addr.0355, i64 5
  %22 = load i8, i8* %arrayidx61, align 1
  %arrayidx64 = getelementptr inbounds i8, i8* %b.addr.0351, i64 5
  %23 = load i8, i8* %arrayidx64, align 1
  %cmp.i336 = icmp ult i8 %20, %21
  %b.a.i337.v.v = select i1 %cmp.i336, i8 %23, i8 %22
  %b.a.i337.v = zext i8 %b.a.i337.v.v to i32
  %b.a.i337 = mul i32 %b.a.i337.v, %w
  %retval.0.i338 = trunc i32 %b.a.i337 to i8
  %arrayidx68 = getelementptr inbounds i8, i8* %e.addr.0354, i64 5
  store i8 %retval.0.i338, i8* %arrayidx68, align 1
  %arrayidx69 = getelementptr inbounds i8, i8* %c.addr.0352, i64 6
  %24 = load i8, i8* %arrayidx69, align 1
  %arrayidx71 = getelementptr inbounds i8, i8* %d.addr.0353, i64 6
  %25 = load i8, i8* %arrayidx71, align 1
  %arrayidx73 = getelementptr inbounds i8, i8* %a.addr.0355, i64 6
  %26 = load i8, i8* %arrayidx73, align 1
  %arrayidx76 = getelementptr inbounds i8, i8* %b.addr.0351, i64 6
  %27 = load i8, i8* %arrayidx76, align 1
  %cmp.i333 = icmp ult i8 %24, %25
  %b.a.i334.v.v = select i1 %cmp.i333, i8 %27, i8 %26
  %b.a.i334.v = zext i8 %b.a.i334.v.v to i32
  %b.a.i334 = mul i32 %b.a.i334.v, %w
  %retval.0.i335 = trunc i32 %b.a.i334 to i8
  %arrayidx80 = getelementptr inbounds i8, i8* %e.addr.0354, i64 6
  store i8 %retval.0.i335, i8* %arrayidx80, align 1
  %arrayidx81 = getelementptr inbounds i8, i8* %c.addr.0352, i64 7
  %28 = load i8, i8* %arrayidx81, align 1
  %arrayidx83 = getelementptr inbounds i8, i8* %d.addr.0353, i64 7
  %29 = load i8, i8* %arrayidx83, align 1
  %arrayidx85 = getelementptr inbounds i8, i8* %a.addr.0355, i64 7
  %30 = load i8, i8* %arrayidx85, align 1
  %arrayidx88 = getelementptr inbounds i8, i8* %b.addr.0351, i64 7
  %31 = load i8, i8* %arrayidx88, align 1
  %cmp.i330 = icmp ult i8 %28, %29
  %b.a.i331.v.v = select i1 %cmp.i330, i8 %31, i8 %30
  %b.a.i331.v = zext i8 %b.a.i331.v.v to i32
  %b.a.i331 = mul i32 %b.a.i331.v, %w
  %retval.0.i332 = trunc i32 %b.a.i331 to i8
  %arrayidx92 = getelementptr inbounds i8, i8* %e.addr.0354, i64 7
  store i8 %retval.0.i332, i8* %arrayidx92, align 1
  %arrayidx93 = getelementptr inbounds i8, i8* %c.addr.0352, i64 8
  %32 = load i8, i8* %arrayidx93, align 1
  %arrayidx95 = getelementptr inbounds i8, i8* %d.addr.0353, i64 8
  %33 = load i8, i8* %arrayidx95, align 1
  %arrayidx97 = getelementptr inbounds i8, i8* %a.addr.0355, i64 8
  %34 = load i8, i8* %arrayidx97, align 1
  %arrayidx100 = getelementptr inbounds i8, i8* %b.addr.0351, i64 8
  %35 = load i8, i8* %arrayidx100, align 1
  %cmp.i327 = icmp ult i8 %32, %33
  %b.a.i328.v.v = select i1 %cmp.i327, i8 %35, i8 %34
  %b.a.i328.v = zext i8 %b.a.i328.v.v to i32
  %b.a.i328 = mul i32 %b.a.i328.v, %w
  %retval.0.i329 = trunc i32 %b.a.i328 to i8
  %arrayidx104 = getelementptr inbounds i8, i8* %e.addr.0354, i64 8
  store i8 %retval.0.i329, i8* %arrayidx104, align 1
  %arrayidx105 = getelementptr inbounds i8, i8* %c.addr.0352, i64 9
  %36 = load i8, i8* %arrayidx105, align 1
  %arrayidx107 = getelementptr inbounds i8, i8* %d.addr.0353, i64 9
  %37 = load i8, i8* %arrayidx107, align 1
  %arrayidx109 = getelementptr inbounds i8, i8* %a.addr.0355, i64 9
  %38 = load i8, i8* %arrayidx109, align 1
  %arrayidx112 = getelementptr inbounds i8, i8* %b.addr.0351, i64 9
  %39 = load i8, i8* %arrayidx112, align 1
  %cmp.i324 = icmp ult i8 %36, %37
  %b.a.i325.v.v = select i1 %cmp.i324, i8 %39, i8 %38
  %b.a.i325.v = zext i8 %b.a.i325.v.v to i32
  %b.a.i325 = mul i32 %b.a.i325.v, %w
  %retval.0.i326 = trunc i32 %b.a.i325 to i8
  %arrayidx116 = getelementptr inbounds i8, i8* %e.addr.0354, i64 9
  store i8 %retval.0.i326, i8* %arrayidx116, align 1
  %arrayidx117 = getelementptr inbounds i8, i8* %c.addr.0352, i64 10
  %40 = load i8, i8* %arrayidx117, align 1
  %arrayidx119 = getelementptr inbounds i8, i8* %d.addr.0353, i64 10
  %41 = load i8, i8* %arrayidx119, align 1
  %arrayidx121 = getelementptr inbounds i8, i8* %a.addr.0355, i64 10
  %42 = load i8, i8* %arrayidx121, align 1
  %arrayidx124 = getelementptr inbounds i8, i8* %b.addr.0351, i64 10
  %43 = load i8, i8* %arrayidx124, align 1
  %cmp.i321 = icmp ult i8 %40, %41
  %b.a.i322.v.v = select i1 %cmp.i321, i8 %43, i8 %42
  %b.a.i322.v = zext i8 %b.a.i322.v.v to i32
  %b.a.i322 = mul i32 %b.a.i322.v, %w
  %retval.0.i323 = trunc i32 %b.a.i322 to i8
  %arrayidx128 = getelementptr inbounds i8, i8* %e.addr.0354, i64 10
  store i8 %retval.0.i323, i8* %arrayidx128, align 1
  %arrayidx129 = getelementptr inbounds i8, i8* %c.addr.0352, i64 11
  %44 = load i8, i8* %arrayidx129, align 1
  %arrayidx131 = getelementptr inbounds i8, i8* %d.addr.0353, i64 11
  %45 = load i8, i8* %arrayidx131, align 1
  %arrayidx133 = getelementptr inbounds i8, i8* %a.addr.0355, i64 11
  %46 = load i8, i8* %arrayidx133, align 1
  %arrayidx136 = getelementptr inbounds i8, i8* %b.addr.0351, i64 11
  %47 = load i8, i8* %arrayidx136, align 1
  %cmp.i318 = icmp ult i8 %44, %45
  %b.a.i319.v.v = select i1 %cmp.i318, i8 %47, i8 %46
  %b.a.i319.v = zext i8 %b.a.i319.v.v to i32
  %b.a.i319 = mul i32 %b.a.i319.v, %w
  %retval.0.i320 = trunc i32 %b.a.i319 to i8
  %arrayidx140 = getelementptr inbounds i8, i8* %e.addr.0354, i64 11
  store i8 %retval.0.i320, i8* %arrayidx140, align 1
  %arrayidx141 = getelementptr inbounds i8, i8* %c.addr.0352, i64 12
  %48 = load i8, i8* %arrayidx141, align 1
  %arrayidx143 = getelementptr inbounds i8, i8* %d.addr.0353, i64 12
  %49 = load i8, i8* %arrayidx143, align 1
  %arrayidx145 = getelementptr inbounds i8, i8* %a.addr.0355, i64 12
  %50 = load i8, i8* %arrayidx145, align 1
  %arrayidx148 = getelementptr inbounds i8, i8* %b.addr.0351, i64 12
  %51 = load i8, i8* %arrayidx148, align 1
  %cmp.i315 = icmp ult i8 %48, %49
  %b.a.i316.v.v = select i1 %cmp.i315, i8 %51, i8 %50
  %b.a.i316.v = zext i8 %b.a.i316.v.v to i32
  %b.a.i316 = mul i32 %b.a.i316.v, %w
  %retval.0.i317 = trunc i32 %b.a.i316 to i8
  %arrayidx152 = getelementptr inbounds i8, i8* %e.addr.0354, i64 12
  store i8 %retval.0.i317, i8* %arrayidx152, align 1
  %arrayidx153 = getelementptr inbounds i8, i8* %c.addr.0352, i64 13
  %52 = load i8, i8* %arrayidx153, align 1
  %arrayidx155 = getelementptr inbounds i8, i8* %d.addr.0353, i64 13
  %53 = load i8, i8* %arrayidx155, align 1
  %arrayidx157 = getelementptr inbounds i8, i8* %a.addr.0355, i64 13
  %54 = load i8, i8* %arrayidx157, align 1
  %arrayidx160 = getelementptr inbounds i8, i8* %b.addr.0351, i64 13
  %55 = load i8, i8* %arrayidx160, align 1
  %cmp.i312 = icmp ult i8 %52, %53
  %b.a.i313.v.v = select i1 %cmp.i312, i8 %55, i8 %54
  %b.a.i313.v = zext i8 %b.a.i313.v.v to i32
  %b.a.i313 = mul i32 %b.a.i313.v, %w
  %retval.0.i314 = trunc i32 %b.a.i313 to i8
  %arrayidx164 = getelementptr inbounds i8, i8* %e.addr.0354, i64 13
  store i8 %retval.0.i314, i8* %arrayidx164, align 1
  %arrayidx165 = getelementptr inbounds i8, i8* %c.addr.0352, i64 14
  %56 = load i8, i8* %arrayidx165, align 1
  %arrayidx167 = getelementptr inbounds i8, i8* %d.addr.0353, i64 14
  %57 = load i8, i8* %arrayidx167, align 1
  %arrayidx169 = getelementptr inbounds i8, i8* %a.addr.0355, i64 14
  %58 = load i8, i8* %arrayidx169, align 1
  %arrayidx172 = getelementptr inbounds i8, i8* %b.addr.0351, i64 14
  %59 = load i8, i8* %arrayidx172, align 1
  %cmp.i309 = icmp ult i8 %56, %57
  %b.a.i310.v.v = select i1 %cmp.i309, i8 %59, i8 %58
  %b.a.i310.v = zext i8 %b.a.i310.v.v to i32
  %b.a.i310 = mul i32 %b.a.i310.v, %w
  %retval.0.i311 = trunc i32 %b.a.i310 to i8
  %arrayidx176 = getelementptr inbounds i8, i8* %e.addr.0354, i64 14
  store i8 %retval.0.i311, i8* %arrayidx176, align 1
  %arrayidx177 = getelementptr inbounds i8, i8* %c.addr.0352, i64 15
  %60 = load i8, i8* %arrayidx177, align 1
  %arrayidx179 = getelementptr inbounds i8, i8* %d.addr.0353, i64 15
  %61 = load i8, i8* %arrayidx179, align 1
  %arrayidx181 = getelementptr inbounds i8, i8* %a.addr.0355, i64 15
  %62 = load i8, i8* %arrayidx181, align 1
  %arrayidx184 = getelementptr inbounds i8, i8* %b.addr.0351, i64 15
  %63 = load i8, i8* %arrayidx184, align 1
  %cmp.i306 = icmp ult i8 %60, %61
  %b.a.i307.v.v = select i1 %cmp.i306, i8 %63, i8 %62
  %b.a.i307.v = zext i8 %b.a.i307.v.v to i32
  %b.a.i307 = mul i32 %b.a.i307.v, %w
  %retval.0.i308 = trunc i32 %b.a.i307 to i8
  %arrayidx188 = getelementptr inbounds i8, i8* %e.addr.0354, i64 15
  store i8 %retval.0.i308, i8* %arrayidx188, align 1
  %inc = add nuw nsw i32 %i.0356, 1
  %add.ptr = getelementptr inbounds i8, i8* %a.addr.0355, i64 16
  %add.ptr189 = getelementptr inbounds i8, i8* %b.addr.0351, i64 16
  %add.ptr190 = getelementptr inbounds i8, i8* %c.addr.0352, i64 16
  %add.ptr191 = getelementptr inbounds i8, i8* %d.addr.0353, i64 16
  %add.ptr192 = getelementptr inbounds i8, i8* %e.addr.0354, i64 16
  %exitcond = icmp eq i32 %inc, 8
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  ret void
}

@ib = local_unnamed_addr global [64 x i32] [i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0], align 16
@ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16

define i32 @foo1() local_unnamed_addr #0 {
; SSE-LABEL: @foo1(
; SSE-NEXT:  entry:
; SSE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16
; SSE-NEXT:    [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
; SSE-NEXT:    store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16
; SSE-NEXT:    br label [[FOR_BODY5:%.*]]
; SSE:       for.cond3:
; SSE-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
; SSE-NEXT:    [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
; SSE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
; SSE:       for.body5:
; SSE-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
; SSE-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
; SSE-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
; SSE-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP33]], -1
; SSE-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
; SSE-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
; SSE:       if.then:
; SSE-NEXT:    tail call void @abort()
; SSE-NEXT:    unreachable
; SSE:       for.end14:
; SSE-NEXT:    ret i32 0
;
; AVX512-LABEL: @foo1(
; AVX512-NEXT:  entry:
; AVX512-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; AVX512-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; AVX512-NEXT:    store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16
; AVX512-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
; AVX512-NEXT:    store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16
; AVX512-NEXT:    br label [[FOR_BODY5:%.*]]
; AVX512:       for.cond3:
; AVX512-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
; AVX512-NEXT:    [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
; AVX512-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
; AVX512:       for.body5:
; AVX512-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
; AVX512-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
; AVX512-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
; AVX512-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP9]], -1
; AVX512-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]]
; AVX512-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
; AVX512:       if.then:
; AVX512-NEXT:    tail call void @abort()
; AVX512-NEXT:    unreachable
; AVX512:       for.end14:
; AVX512-NEXT:    ret i32 0
;
entry:
  %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16
  %neg = xor i32 %0, -1
  store i32 %neg, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 0), align 16
  %1 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 1), align 4
  %neg.1 = xor i32 %1, -1
  store i32 %neg.1, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 1), align 4
  %2 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 2), align 8
  %neg.2 = xor i32 %2, -1
  store i32 %neg.2, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 2), align 8
  %3 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 3), align 4
  %neg.3 = xor i32 %3, -1
  store i32 %neg.3, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 3), align 4
  %4 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4), align 16
  %neg.4 = xor i32 %4, -1
  store i32 %neg.4, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4), align 16
  %5 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 5), align 4
  %neg.5 = xor i32 %5, -1
  store i32 %neg.5, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 5), align 4
  %6 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 6), align 8
  %neg.6 = xor i32 %6, -1
  store i32 %neg.6, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 6), align 8
  %7 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 7), align 4
  %neg.7 = xor i32 %7, -1
  store i32 %neg.7, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 7), align 4
  %8 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8), align 16
  %neg.8 = xor i32 %8, -1
  store i32 %neg.8, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8), align 16
  %9 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 9), align 4
  %neg.9 = xor i32 %9, -1
  store i32 %neg.9, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 9), align 4
  %10 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 10), align 8
  %neg.10 = xor i32 %10, -1
  store i32 %neg.10, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 10), align 8
  %11 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 11), align 4
  %neg.11 = xor i32 %11, -1
  store i32 %neg.11, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 11), align 4
  %12 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12), align 16
  %neg.12 = xor i32 %12, -1
  store i32 %neg.12, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12), align 16
  %13 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 13), align 4
  %neg.13 = xor i32 %13, -1
  store i32 %neg.13, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 13), align 4
  %14 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 14), align 8
  %neg.14 = xor i32 %14, -1
  store i32 %neg.14, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 14), align 8
  %15 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 15), align 4
  %neg.15 = xor i32 %15, -1
  store i32 %neg.15, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 15), align 4
  %16 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16), align 16
  %neg.16 = xor i32 %16, -1
  store i32 %neg.16, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16), align 16
  %17 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 17), align 4
  %neg.17 = xor i32 %17, -1
  store i32 %neg.17, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 17), align 4
  %18 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 18), align 8
  %neg.18 = xor i32 %18, -1
  store i32 %neg.18, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 18), align 8
  %19 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 19), align 4
  %neg.19 = xor i32 %19, -1
  store i32 %neg.19, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 19), align 4
  %20 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20), align 16
  %neg.20 = xor i32 %20, -1
  store i32 %neg.20, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20), align 16
  %21 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 21), align 4
  %neg.21 = xor i32 %21, -1
  store i32 %neg.21, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 21), align 4
  %22 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 22), align 8
  %neg.22 = xor i32 %22, -1
  store i32 %neg.22, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 22), align 8
  %23 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 23), align 4
  %neg.23 = xor i32 %23, -1
  store i32 %neg.23, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 23), align 4
  %24 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24), align 16
  %neg.24 = xor i32 %24, -1
  store i32 %neg.24, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24), align 16
  %25 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 25), align 4
  %neg.25 = xor i32 %25, -1
  store i32 %neg.25, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 25), align 4
  %26 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 26), align 8
  %neg.26 = xor i32 %26, -1
  store i32 %neg.26, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 26), align 8
  %27 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 27), align 4
  %neg.27 = xor i32 %27, -1
  store i32 %neg.27, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 27), align 4
  %28 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28), align 16
  %neg.28 = xor i32 %28, -1
  store i32 %neg.28, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28), align 16
  %29 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 29), align 4
  %neg.29 = xor i32 %29, -1
  store i32 %neg.29, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 29), align 4
  %30 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 30), align 8
  %neg.30 = xor i32 %30, -1
  store i32 %neg.30, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 30), align 8
  %31 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 31), align 4
  %neg.31 = xor i32 %31, -1
  store i32 %neg.31, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 31), align 4
  %32 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32), align 16
  %neg.32 = xor i32 %32, -1
  store i32 %neg.32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32), align 16
  %33 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 33), align 4
  %neg.33 = xor i32 %33, -1
  store i32 %neg.33, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 33), align 4
  %34 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 34), align 8
  %neg.34 = xor i32 %34, -1
  store i32 %neg.34, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 34), align 8
  %35 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 35), align 4
  %neg.35 = xor i32 %35, -1
  store i32 %neg.35, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 35), align 4
  %36 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36), align 16
  %neg.36 = xor i32 %36, -1
  store i32 %neg.36, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36), align 16
  %37 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 37), align 4
  %neg.37 = xor i32 %37, -1
  store i32 %neg.37, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 37), align 4
  %38 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 38), align 8
  %neg.38 = xor i32 %38, -1
  store i32 %neg.38, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 38), align 8
  %39 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 39), align 4
  %neg.39 = xor i32 %39, -1
  store i32 %neg.39, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 39), align 4
  %40 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40), align 16
  %neg.40 = xor i32 %40, -1
  store i32 %neg.40, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40), align 16
  %41 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 41), align 4
  %neg.41 = xor i32 %41, -1
  store i32 %neg.41, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 41), align 4
  %42 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 42), align 8
  %neg.42 = xor i32 %42, -1
  store i32 %neg.42, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 42), align 8
  %43 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 43), align 4
  %neg.43 = xor i32 %43, -1
  store i32 %neg.43, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 43), align 4
  %44 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44), align 16
  %neg.44 = xor i32 %44, -1
  store i32 %neg.44, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44), align 16
  %45 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 45), align 4
  %neg.45 = xor i32 %45, -1
  store i32 %neg.45, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 45), align 4
  %46 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 46), align 8
  %neg.46 = xor i32 %46, -1
  store i32 %neg.46, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 46), align 8
  %47 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 47), align 4
  %neg.47 = xor i32 %47, -1
  store i32 %neg.47, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 47), align 4
  %48 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48), align 16
  %neg.48 = xor i32 %48, -1
  store i32 %neg.48, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48), align 16
  %49 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 49), align 4
  %neg.49 = xor i32 %49, -1
  store i32 %neg.49, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 49), align 4
  %50 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 50), align 8
  %neg.50 = xor i32 %50, -1
  store i32 %neg.50, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 50), align 8
  %51 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 51), align 4
  %neg.51 = xor i32 %51, -1
  store i32 %neg.51, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 51), align 4
  %52 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52), align 16
  %neg.52 = xor i32 %52, -1
  store i32 %neg.52, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52), align 16
  %53 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 53), align 4
  %neg.53 = xor i32 %53, -1
  store i32 %neg.53, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 53), align 4
  %54 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 54), align 8
  %neg.54 = xor i32 %54, -1
  store i32 %neg.54, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 54), align 8
  %55 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 55), align 4
  %neg.55 = xor i32 %55, -1
  store i32 %neg.55, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 55), align 4
  %56 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56), align 16
  %neg.56 = xor i32 %56, -1
  store i32 %neg.56, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56), align 16
  %57 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 57), align 4
  %neg.57 = xor i32 %57, -1
  store i32 %neg.57, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 57), align 4
  %58 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 58), align 8
  %neg.58 = xor i32 %58, -1
  store i32 %neg.58, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 58), align 8
  %59 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 59), align 4
  %neg.59 = xor i32 %59, -1
  store i32 %neg.59, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 59), align 4
  %60 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60), align 16
  %neg.60 = xor i32 %60, -1
  store i32 %neg.60, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60), align 16
  %61 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 61), align 4
  %neg.61 = xor i32 %61, -1
  store i32 %neg.61, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 61), align 4
  %62 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 62), align 8
  %neg.62 = xor i32 %62, -1
  store i32 %neg.62, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 62), align 8
  %63 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 63), align 4
  %neg.63 = xor i32 %63, -1
  store i32 %neg.63, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 63), align 4
  br label %for.body5

for.cond3:                                        ; preds = %for.body5
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %cmp4 = icmp ult i64 %indvars.iv, 63
  br i1 %cmp4, label %for.body5, label %for.end14

for.body5:                                        ; preds = %entry, %for.cond3
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond3 ]
  %arrayidx7 = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 %indvars.iv
  %64 = load i32, i32* %arrayidx7, align 4
  %arrayidx9 = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 %indvars.iv
  %65 = load i32, i32* %arrayidx9, align 4
  %neg10 = xor i32 %65, -1
  %cmp11 = icmp eq i32 %64, %neg10
  br i1 %cmp11, label %for.cond3, label %if.then

if.then:                                          ; preds = %for.body5
  tail call void @abort() #2
  unreachable

for.end14:                                        ; preds = %for.cond3
  ret i32 0
}

declare void @abort() #2
